import requests
import base64
import json
from zlib import decompress
from langchain_community.document_loaders import JSONLoader
from pprint import pprint
import re
import time
import random

def compress(e):
    # 提取压缩数据
    compressed_data = e['compressed_data']
    
    # Base64 解码
    decoded_data = base64.b64decode(compressed_data)
    
    # 解压数据
    inflated_data = decompress(decoded_data)
    
    # 解析 JSON
    result = json.loads(inflated_data)
    
    return result

# 示例使用
# res = requests.get("https://ppmark.cn/datas/platform-5.json")
# # print(res.json() )
# params = res.json()
# result = compress(params)
# print(result)

# # 以utf-8编码写入文件
# with open('/tmp/poju.json', 'w', encoding='utf-8') as f:
#     json.dump(result, f, ensure_ascii=False, indent=4)

# loader = JSONLoader(
#     file_path='/tmp/poju.json',
#     jq_schema='.[].text',
#     text_content=False)

# data = loader.load()
# pprint(data)

data = None
with open('/tmp/poju.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

url = "https://api.zsxq.com/v2/topics/811441555212112/info"

payload = {}
headers = {
  'authority': 'api.zsxq.com',
  'accept': 'application/json, text/plain, */*',
  'accept-language': 'zh-CN,zh;q=0.9',
  'cookie': 'zsxq_access_token=29020683-6C36-824A-0CDB-DFE1BCAA40F0_8087587E2B72A80A; zsxqsessionid=e0a0ce9bd0aac2318a0e69c036033f9b; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22281884855181%22%2C%22first_id%22%3A%2218a937b8e68165-0c0ef5acfbff28-11462c6c-2073600-18a937b8e6916c2%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_referrer%22%3A%22%22%7D%2C%22identities%22%3A%22eyIkaWRlbnRpdHlfY29va2llX2lkIjoiMThhOTM3YjhlNjgxNjUtMGMwZWY1YWNmYmZmMjgtMTE0NjJjNmMtMjA3MzYwMC0xOGE5MzdiOGU2OTE2YzIiLCIkaWRlbnRpdHlfbG9naW5faWQiOiIyODE4ODQ4NTUxODEifQ%3D%3D%22%2C%22history_login_id%22%3A%7B%22name%22%3A%22%24identity_login_id%22%2C%22value%22%3A%22281884855181%22%7D%2C%22%24device_id%22%3A%2218a937b8e68165-0c0ef5acfbff28-11462c6c-2073600-18a937b8e6916c2%22%7D',
  'origin': 'https://wx.zsxq.com',
  'referer': 'https://wx.zsxq.com/',
  'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
  'sec-ch-ua-mobile': '?0',
  'sec-ch-ua-platform': '"Linux"',
  'sec-fetch-dest': 'empty',
  'sec-fetch-mode': 'cors',
  'sec-fetch-site': 'same-site',
  'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  'x-request-id': '667083f17-4ebd-35c9-f033-d776a325af7',
  'x-signature': '1f72ce8c56fab43b4db7280050b086cbf5a369ef',
  'x-timestamp': '1706860998',
  'x-version': '2.50.0'
}

for article in data:
    print(f"正在保存{article['topic_id']}")
    topic_id = article['topic_id']
    # 如果'/c/AI/AI破局/{topic_id}.txt'文件存在，则跳过
    try:
        with open(f'/c/AI/AI破局/{topic_id}.txt', 'r', encoding='utf-8') as f:
            continue
    except:
        pass
    url = f"https://api.zsxq.com/v2/topics/{topic_id}/info"

    response = requests.request("GET", url, headers=headers, data=payload)
    art_data = response.json()
    try:
        text = f"作者：{art_data['resp_data']['topic']['talk']['owner']['name']}\n发表日期：{art_data['resp_data']['topic']['create_time']}\n正文：{art_data['resp_data']['topic']['talk']['text']}"
        with open(f'/c/AI/AI破局/{topic_id}.txt', 'w', encoding='utf-8') as f:
            f.write(text)
        if "feishu.cn" in art_data['resp_data']['topic']['talk']['text']:
            links = re.findall(r'href=\"https.*?feishu.cn.*?\"', art_data['resp_data']['topic']['talk']['text'])
            print(links)
            with open(f'/c/AI/AI破局/{topic_id}-feishu.txt', 'w', encoding='utf-8') as f:
                for link in links:
                    res = requests.get(link[6:-1])
                    f.write(res.text)
    except Exception as e:
        print(art_data)
        print(f"获取{topic_id}失败:{e}")
    # 随机休眠3-5s
    time.sleep(random.randint(10, 20))